################################################
### Assess Model Fit After Demand Estimation ###
################################################

memory.limit(size=150000)
setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data") # Office computer directory


# Define sample
	# We have to divide up the data to run this code for all households	
	# I will divide up by the number of years the household appears in the sample
	# There are roughly 2.6 million
	# I will try dividing this up into 10 
	
# Load Sample-Independent Data
data <- get(load("ca_enrollment_data_AUG022019")) # "Cleaned" Covered California enrollment data
households <- get(load("ca_household_data_AUG022019")) # "Cleaned" Covered California household data
plan_data <- get(load("ca_plan_characteristics_AUG032019_small")) 
age_rating_factors <- read.csv("age_rating_factors.csv",row.names=1) # CCIIO default rating curve
poverty_guidelines <- read.csv("poverty_guidelines.csv",row.names=1) # Poverty guidelines
rating_areas <- read.csv("ca_rating_areas.csv",row.names = 1) # California county-rating area mapping
zip3_choices <- read.csv("zip3_choices2.csv",row.names = 1) # choice set by 3 digit zip and rating area
product_definitions <- read.csv("product_definitions.csv",row.names = 1) # definitions of column names in zip3_choices
contribution_percentages <- read.csv("contribution_percentages.csv",row.names = 1) # ACA contribution percentages
geog_factors <- read.csv("ca_cms_geographic_factors.csv",header=T,row.names=1)
outside_logit <- get(load("sipp_logit"))
reduced_form_spec <- get(load("reduced_form"))
set.seed(2)

# Drop flagged households
households <- households[!households$flagged,]
data <- data[!data$flagged,]


zipchoices <- as.matrix(zip3_choices[,4:ncol(zip3_choices)])
product_definitions$insurer <- as.character(product_definitions$insurer)
product_definitions$plan_network_type <- as.character(product_definitions$plan_network_type)
single_product_insurers <- c("Chinese_Community","Contra_Costa","Kaiser","LA_Care","Molina",
			"Oscar","United","Valley","Western")
large_insurers <- c("Anthem","Blue_Shield","Kaiser","Health_Net","Molina")
		
#### Predict who left market
		
	# Add variables
			
		# Income brackets
		households$FPL_bracket <- "138orless"
		households[households$FPL > 1.38 & households$FPL <= 2.50,"FPL_bracket"] <- "138to250"
		households[households$FPL > 2.50 & households$FPL <= 4,"FPL_bracket"] <- "250to400"
		households[households$FPL > 4,"FPL_bracket"] <- "400ormore"
				
		# Age Brackets
		households$perc_18to34 <- households$perc_18to25 + households$perc_26to34
		households$perc_35to54 <- households$perc_35to44 + households$perc_45to54
				

	# Determine in/out of market
	households$out_of_market <- 0
	out_of_market_predictions <- as.numeric(predict(outside_logit,newdata=households[is.na(households$plan_number_nocsr),],type="response") >=
		runif(nrow(households[is.na(households$plan_number_nocsr),])))
	households[is.na(households$plan_number_nocsr),"out_of_market"] <- out_of_market_predictions
			
	# Remove records from market 
	households <- households[households$out_of_market == 0,]
	data <- data[data$household_year %in% rownames(households),]
		
gc()		
		
		
#### Form 5% Holdout Sample (5% of households, not records)
	
	set.seed(6)
	sample_size <- round(.05 * length(unique(households$household_id)))
	estimation_sample_household_ids <- sort(sample(unique(households$household_id),size=sample_size,replace=FALSE))
	holdout_sample_household_ids <- sort(sample(setdiff(unique(households$household_id),estimation_sample_household_ids),size=sample_size,replace=FALSE))
	keep_records <- which(households$household_id %in% holdout_sample_household_ids) 
	households <- households[keep_records,]
	
	# Drop 2019 households
	households <- households[households$year < 2019,]
	data <- data[data$household_year %in% rownames(households),]			
		
#### Create Samples

	# Number of years in panel
	years_in_panel <- by(households$household_id,households$household_id,length)
	households$years_in_panel <- 0
	households$years_in_panel <- years_in_panel[as.character(households$household_id)]
	
	# Nth year in panel (what year in sequence)
	households$nth_year <- 0
	
		# First year
		first_years <- by(households$year,households$household_id,min)
		households[paste(rownames(first_years),first_years,sep="_"),"nth_year"] <- 1 
		
		# Second year
		second_years <- by(households[households$nth_year == 0 & households$years_in_panel >= 2,"year"],
			households[households$nth_year == 0 & households$years_in_panel >= 2,"household_id"],min)
		households[paste(rownames(second_years),second_years,sep="_"),"nth_year"] <- 2 
		
		# Third year
		third_years <- by(households[households$nth_year == 0 & households$years_in_panel >= 3,"year"],
			households[households$nth_year == 0 & households$years_in_panel >= 3,"household_id"],min)
		households[paste(rownames(third_years),third_years,sep="_"),"nth_year"] <- 3 
		
		# Fourth year
		fourth_years <- by(households[households$nth_year == 0 & households$years_in_panel >= 4,"year"],
			households[households$nth_year == 0 & households$years_in_panel >= 4,"household_id"],min)
		households[paste(rownames(fourth_years),fourth_years,sep="_"),"nth_year"] <- 4 
		
		# Fifth year
		fifth_years <- by(households[households$nth_year == 0 & households$years_in_panel >= 5,"year"],
			households[households$nth_year == 0 & households$years_in_panel >= 5,"household_id"],min)
		households[paste(rownames(fifth_years),fifth_years,sep="_"),"nth_year"] <- 5
		
	# nonconsecutives are an issue, but let's ignore for now
	
	# Divide up into 9 roughly equal groups by number of years in panel
		
		households$group <- 1
		
		two_year_household_ids <- unique(households[households$years_in_panel == 2,"household_id"])
		households[households$household_id %in% two_year_household_ids[1:(round(length(two_year_household_ids)/2)-1)],"group"] <- 2 
		households[households$household_id %in% two_year_household_ids[round(length(two_year_household_ids)/2):length(two_year_household_ids)],"group"] <- 3 
		
		three_year_household_ids <- unique(households[households$years_in_panel == 3,"household_id"])
		households[households$household_id %in% three_year_household_ids[1:(round(length(three_year_household_ids)/2)-1)],"group"] <- 4 
		households[households$household_id %in% three_year_household_ids[round(length(three_year_household_ids)/2):length(three_year_household_ids)],"group"] <- 5 
		
		fourth_year_household_ids <- unique(households[households$years_in_panel == 4,"household_id"])
		households[households$household_id %in% fourth_year_household_ids[1:(round(length(fourth_year_household_ids)/2)-1)],"group"] <- 6 
		households[households$household_id %in% fourth_year_household_ids[round(length(fourth_year_household_ids)/2):length(fourth_year_household_ids)],"group"] <- 7 
		
		fifth_year_household_ids <- unique(households[households$years_in_panel == 5,"household_id"])
		households[households$household_id %in% fifth_year_household_ids[1:(round(length(fifth_year_household_ids)/2)-1)],"group"] <- 8 
		households[households$household_id %in% fifth_year_household_ids[round(length(fifth_year_household_ids)/2):length(fifth_year_household_ids)],"group"] <- 9 
		
#### Predict base plan premiums for CF approach

	plans <- get(load("ca_plan_characteristics_AUG032019_small")) # plan characteristics (includes same plans in different rating areas and years)
	geog_factors <- read.csv("ca_cms_geographic_factors.csv",header=T,row.names=1)
	
	# Clean-up Steps
	
	product_covariates <- c("Premium","AV","Silver","HMO","Anthem","Blue_Shield","Kaiser","Health_Net","Anthem_HMO")
	instrumental_variables <- c("geog_factor","Hausman_instrument","HMO_own","HMO_other","AV_own","AV_other")
	
	# Now we can drop the "uninsured" column from the choices and premiums object, and the uninsured row from the plan object
	plans <- plans[!is.na(plans$region),]

	# Drop CSR plans
	plans <- plans[!plans$AV %in% c(0.73,0.87,0.94),]

	# Drop 2019
	plans <- plans[plans$Year < 2019,]

	# Drop Catastrophic
	plans <- plans[plans$Metal_Level != "Minimum Coverage",]
	
	# Remove Duplicate Bronze and Small Plans
	plans$region_year <- paste(plans$region,plans$Year,sep="_")
	plans$unique_id_nohsacat <- paste(plans$Plan_Name_Small_NOHSACAT,plans$Year,plans$region,sep="_")	
	duplicated_plans <- plans[which(duplicated(plans$unique_id_nohsacat)),"unique_id_nohsacat"]
	for(mkt_yr in unique(plans$region_year)) {
		mkt_duplicated_plans <- intersect(duplicated_plans,plans[plans$region_year == mkt_yr,"unique_id_nohsacat"])
		for(j in mkt_duplicated_plans) {
			plans[plans$region_year == mkt_yr & plans$unique_id_nohsacat == j,"Premium"] <- min(plans[plans$region_year == mkt_yr & plans$unique_id_nohsacat == j,"Premium"])
			plans[plans$region_year == mkt_yr & plans$unique_id_nohsacat == j,"Hausman_instrument"] <- min(plans[plans$region_year == mkt_yr & plans$unique_id_nohsacat == j,"Hausman_instrument"])
		}
	}	
	plans <- plans[!duplicated(plans$unique_id_nohsacat),]
		
	# Define covariates
		
		year_FEs <- paste("Year",c(2015:2018),sep="") 
		market_FEs <- paste("Rating_Area",c(2:19),sep="")
		covariates <- c(product_covariates,year_FEs,market_FEs,instrumental_variables)
		
		
	# Create Variables
			
		# HMO Dummy (Lump EPO/PPO into one)
		plans$HMO <- as.numeric(plans$PLAN_NETWORK_TYPE == "HMO")
		plans$Anthem_HMO <- plans$Anthem * plans$HMO
		
		# Year dummies
		for(yr in c(2015:2018)) {
			plans[,paste("Year",yr,sep="") ] <- as.numeric(plans$Year == yr)
		}
			
		# Geographic Cost Factors
		plans$geog_factor <- geog_factors[paste(plans$region,plans$Year,sep="_"),"GCF"]
		
		# Rating Area Dummies
		for(mkt in c(2:19)) {
			plans[,paste("Rating_Area",mkt,sep="")] <- as.numeric(plans$Rating_Area == mkt)
		}
		
		# Rating Areas
		plans$Rating_Area <- as.factor(plans$Rating_Area)
		
		plans$Premium <- plans$Premium/1.278 # convert from 40-year-old to 21-year-old premium
			
	# Perform reduced form		
	reduced_form <- lm(Premium ~ .,data=plans[,covariates]) 		
	plans$residual <- residuals(reduced_form)		
		
	rownames(plans) <- paste(plans$Plan_Name_Small_NOHSACAT,plans$Year,plans$region,sep="")
	
num_households_prev_groups <- 0		

	
	##### Choice Matrix (households by plans)
		# 1 if household selected plan, 0 if household did not select plan, NA if plan not in choice set
		# Complication: households that selected multiple plans
		
		plan_names <- sort(unique(plan_data$Plan_Name2))
		plan_numbers <- 1:length(plan_names)
		names(plan_numbers) <- plan_names
		#data$plan_number <- NA
		#data[!is.na(data$plan_id),"plan_number"] <- plan_numbers[data[!is.na(data$plan_id),"plan_name"]]
		data[is.na(data$plan_id),"plan_number"] <- max(plan_numbers) + 1
		plan_data$plan_number <- plan_numbers[plan_data$Plan_Name2]
		
		# Assign an integer to each plan (eliminating small plans
		plan_names_small <- sort(unique(plan_data$Plan_Name_Small))
		plan_numbers_small <- 1:length(plan_names_small)
		names(plan_numbers_small) <- plan_names_small
		#data$plan_number_small <- NA
		#data[!is.na(data$plan_id),"plan_number_small"] <- plan_numbers_small[data[!is.na(data$plan_id),"plan_name"]]
		#data[is.na(data$plan_id),"plan_number_small"] <- max(data$plan_number_small,na.rm=TRUE) + 1
		plan_data$plan_number_small <- plan_numbers_small[plan_data$Plan_Name_Small]
		
		# In this version, we are going to eliminate CSR, HSA, and Catastrophic plans
		plan_names_nohsacat <- sort(unique(plan_data$Plan_Name_Small_NOHSACAT))
		plan_numbers_nohsacat <- 1:length(plan_names_nohsacat)
		names(plan_numbers_nohsacat) <- plan_names_nohsacat
		plan_data$plan_number_nohsacat <- plan_numbers_nohsacat[plan_data$Plan_Name_Small_NOHSACAT]
		
		# This will link the full plan number to the nohsacat plan number
		reference_numbers <- plan_numbers
		for(j in 1:length(reference_numbers)){
			reference_plan_name <- unique(plan_data[which(plan_data$Plan_Name == names(plan_numbers)[j]),"Plan_Name_Small_NOHSACAT"]) 
			reference_numbers[j] <- plan_numbers_nohsacat[reference_plan_name]
		}
		
				
		# Initialize choice and new plans matrix
		choices <- matrix(NA,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
		
		# Households
		households$change_in_products <- 0
		households$change_in_insurers <- 0
		households$change_in_large_insurers <- 0
		households$market_turnover <- 0
		
		# Choices for the exchange population for which we have 3-digit zip (plan has to be offered in year and zip)
		for(i in rownames(zip3_choices)) {
			
				t <- zip3_choices[i,"Year"]
			
				# Get the broad product categories available (insurer/network type/etc. - no metal)
				region_year_plans <- which(plan_data$Year == t &
					plan_data$region == zip3_choices[i,"Region"])
				available_products <- zipchoices[i,]
				available_products <- names(available_products)[!is.na(available_products)]
				available_insurers <- unique(product_definitions[available_products,"insurer"])
				large_available_insurers <- intersect(large_insurers,available_insurers)
				available_plans <- c()
				
				# Last year's plans
				if(t > 2014) {
					lastyr_zip <- paste(zip3_choices[i,"zip3"],zip3_choices[i,"Region"],t-1,sep="_")
					lastyr_region_year_plans <- which(plan_data$Year == t-1 &
						plan_data$region == zip3_choices[lastyr_zip,"Region"])
					lastyr_available_products <- zipchoices[lastyr_zip,]
					lastyr_available_products <- names(lastyr_available_products)[!is.na(lastyr_available_products)]
					lastyr_available_insurers <- unique(product_definitions[lastyr_available_products,"insurer"])
					lastyr_large_available_insurers <- intersect(large_insurers,lastyr_available_insurers)
					lastyr_available_plans <- c()
				}
				
				# Now get the specific plans available
							
				if ("Anthem" %in% available_insurers) {
					if("Anthem_HMO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_PPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO_MSP" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 1)))
					}
					if("Anthem_PPO_MSP" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 1)))
					}
				} 
				if ("Blue_Shield" %in% available_insurers) {
					# could be HMO, EPO, PPO
					if("Blue_Shield_HMO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
					}
					if("Blue_Shield_EPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
					}
					if("Blue_Shield_PPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
					}
				} 
				if ("Health_Net" %in% available_insurers) {
					# could be HMO, HSP, EPO, PPO
					if("Health_Net_HMO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
					}
					if("Health_Net_HSP" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
					}
					if("Health_Net_EPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
					}
					if("Health_Net_PPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
					}
				}
				if ("SHARP" %in% available_insurers) {
					# could be network 1 or 2
					if("Sharp1" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "SHARP" & plan_data$Network_num == 1 & 
								!is.na(plan_data$Network_num))))
					}
					if("Sharp2" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "SHARP" & plan_data$Network_num == 2 & 
								!is.na(plan_data$Network_num))))
					}
				}
				if (any(single_product_insurers %in% available_insurers)) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Insurer %in% intersect(available_insurers,single_product_insurers))))
				}
				
				households_to_update <- which(households$zip_region_year == i & !is.na(households$zip_region_year))
				available_plan_names <- unique(as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"]))
				choices[households_to_update,available_plan_names] <- 0
				
					if(t > 2014) {
						if(length(available_products) > length(lastyr_available_products)) {
							households[households_to_update,"change_in_products"] <- 1 # Net entry
						} else if (length(available_products) < length(lastyr_available_products)) {
							households[households_to_update,"change_in_products"] <- -1 # Net exit
						}
						
						if(length(available_insurers) > length(lastyr_available_insurers)) {
							households[households_to_update,"change_in_insurers"] <- 1 # Net entry
						} else if (length(available_insurers) < length(lastyr_available_insurers)) {
							households[households_to_update,"change_in_insurers"] <- -1 # Net exit
						}
						
						if(length(large_available_insurers) > length(lastyr_large_available_insurers)) {
							households[households_to_update,"change_in_large_insurers"] <- 1 # Net entry
						} else if (length(large_available_insurers) < length(lastyr_large_available_insurers)) {
							households[households_to_update,"change_in_large_insurers"] <- -1 # Net exit
						}
						
						lastyr_large_available_insurers
						
						number_in_market <- sum(households[!is.na(households$plan_number_nocsr) & households$zip_region_year == i &
								!is.na(households$zip_region_year),"weight"])
						
						if(number_in_market > 0) {
							households[households_to_update,"market_turnover"] <- 
								sum(households[is.na(households$previous_plan_number) & 
									!is.na(households$plan_number_nocsr) & households$zip_region_year == i &
									!is.na(households$zip_region_year),"weight"])/number_in_market
						}	
						
					}
		}
		
		# Restrictions on who can pick ACS plans
			# Must be subsidy eligible and have income below 250%
			# "Unenhanced silver" is not in choice set of ACS-eligibles
			# NOTE: this is not quite accurate if households have both subsidized and unsubsidized members
		
		acs_eligibles73 <- which(households$FPL > 2 & households$FPL <= 2.5 & households$subsidized_members > 0)
		acs_eligibles87 <- which(households$FPL > 1.5 & households$FPL <= 2 & households$subsidized_members > 0)
		acs_eligibles94 <- which(households$FPL <= 1.5 & households$subsidized_members > 0)
		
		# Household AV matrix 
			# Just the plan AV for non-silver plans
			# CSR plan AV for silver plans
		
		AV_matrix <- matrix(0,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
		
		AV_matrix[,unique(plan_data[plan_data$Metal_Level %in% c("Bronze"),"Plan_Name_Small_NOHSACAT"])] <- 0.6
		AV_matrix[,unique(plan_data[plan_data$Metal_Level %in% c("Gold"),"Plan_Name_Small_NOHSACAT"])] <- 0.8
		AV_matrix[,unique(plan_data[plan_data$Metal_Level %in% c("Platinum"),"Plan_Name_Small_NOHSACAT"])] <- 0.9
		
		AV_matrix[,unique(plan_data[plan_data$Metal_Level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.7
		AV_matrix[acs_eligibles73,unique(plan_data[plan_data$Metal_Level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.73
		AV_matrix[acs_eligibles87,unique(plan_data[plan_data$Metal_Level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.87
		AV_matrix[acs_eligibles94,unique(plan_data[plan_data$Metal_Level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.94
			
		gc()
						
			
		# Input choice
			# Complication with households choosing multiple plans
			# Use choice of household head
		
		plan_data <- rbind(plan_data,rep(NA,dim(plan_data)[2]))
		plan_data$Plan_Name_Small_NOHSACAT <- as.character(plan_data$Plan_Name_Small_NOHSACAT)
		plan_data[nrow(plan_data),"Plan_Name_Small_NOHSACAT"] <- "Uninsured"
		#plan_data[nrow(plan_data),"plan_unique_id"] <- "Uninsured"
		plan_data[nrow(plan_data),"plan_number_nohsacat"] <- ncol(choices) + 1
		plan_data[nrow(plan_data),"plan_number"] <- max(plan_data$plan_number,na.rm=T) + 1
		#rownames(plan_data)[nrow(plan_data)] <- "Uninsured"
		
		assign_head_plan <- function(x) {
			return(as.integer(x[1]))
		}
		
		assign_oldest <- function(x) {
			return(as.character(x[1]))
		}
		
		households$plan_number <- as.numeric(by(data[,"plan_number"],data[,"household_year"],assign_head_plan)[rownames(households)])
		households$plan_name <- by(data[,"plan_name"],data[,"household_year"],assign_oldest)[rownames(households)]
		households$plan_id <- as.numeric(by(data[,"plan_id"],data[,"household_year"],assign_head_plan)[rownames(households)])
		#households[households$plan_number == max(plan_data$plan_number),"plan_name"] <- "Uninsured"
		
		households$plan_number_nohsacat <- reference_numbers[households$plan_number]
		households[is.na(households$plan_id),"plan_number_nohsacat"] <- ncol(choices) + 1	
			
		choices <- cbind(choices,rep(0,nrow(choices)))
		colnames(choices) <- c(colnames(choices)[1:(ncol(choices)-1)],"Uninsured")
		choices[cbind(seq(nrow(choices)),households$plan_number_nohsacat)] <- 1
		gc()
		
		AV_matrix <- cbind(AV_matrix,rep(0,nrow(AV_matrix)))
		colnames(AV_matrix) <- c(colnames(AV_matrix)[1:(ncol(AV_matrix)-1)],"Uninsured")
		gc()
		
		# Number of choices in household choice set (deduct 1 because of the uninsured option)
		households$num_choices <- rowSums(!is.na(choices)) - 1
		
		gc()
		
	##### Previous plan choices

		# Previous plan choice is already in household object
		# You need to be careful with CSR plans (shift between CSR plans shouldn't count as change
		
		households$previous_plan_number_nohsacat <- reference_numbers[households$previous_plan_number]

		
		previous_choices <- matrix(0,nrow(choices),ncol(choices),dimnames=list(rownames(choices),colnames(choices)))
		
		for(j in unique(households$previous_plan_number_nohsacat)) {
		
			# Get households that chose j in previous year
			households_on_j <- which(households$previous_plan_number_nohsacat == j)
		
			# Get all plans that would be considered equivalent to j
			#j_plan_name <- colnames(choices)[j]
			#equivalent_plans <- unique(plan_data[plan_data$Plan_Name_Small_NOCSR == j_plan_name,"plan_number_small"])
		
			# Input previous choice (all equivalent plans get a 1, will eliminate extras in make.julia.V2.r)
			#previous_choices[households_on_j,equivalent_plans] <- 1
			previous_choices[households_on_j,j] <- 1
		}
		
		#previous_choices <- cbind(previous_choices,NA)
		#colnames(previous_choices)[ncol(previous_choices)] <- "Uninsured"
		
	##### Premium Object

	#choices <- get(load("ca_choice_matrix_FEB082016"))

		premiums <- choices
		premiums_predicted <- choices
		
		catastrophic_plans <- which(plan_data$metal_level == "Minimum Coverage")
		
		# Choices for the exchange population for which we have 3-digit zip (plan has to be offered in year and zip)
		for(i in rownames(zip3_choices)) {
			
				region_year_plans <- which(plan_data$Year == zip3_choices[i,"Year"] &
					plan_data$region == zip3_choices[i,"Region"])
			
				# Get the broad product categories available (insurer/network type/etc. - no metal)
				available_products <- zipchoices[i,]
				available_products <- names(available_products)[!is.na(available_products)]
				available_insurers <- unique(product_definitions[available_products,"insurer"])
				
				# Now get the specific plans available
				
				available_plans <- c()
				if ("Anthem" %in% available_insurers) {
					if("Anthem_HMO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_PPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO_MSP" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 1)))
					}
					if("Anthem_PPO_MSP" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 1)))
					}
				} 
				if ("Blue_Shield" %in% available_insurers) {
					# could be HMO, EPO, PPO
					if("Blue_Shield_HMO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
					}
					if("Blue_Shield_EPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
					}
					if("Blue_Shield_PPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
					}
				} 
				if ("Health_Net" %in% available_insurers) {
					# could be HMO, HSP, EPO, PPO
					if("Health_Net_HMO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
					}
					if("Health_Net_HSP" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
					}
					if("Health_Net_EPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
					}
					if("Health_Net_PPO" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
					}
				}
				if ("SHARP" %in% available_insurers) {
					# could be network 1 or 2
					if("Sharp1" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "SHARP" & plan_data$Network_num == 1 & 
								!is.na(plan_data$Network_num))))
					}
					if("Sharp2" %in% available_products) {
						available_plans <- c(available_plans,
							intersect(region_year_plans,
								which(plan_data$Insurer == "SHARP" & plan_data$Network_num == 2 & 
								!is.na(plan_data$Network_num))))
					}
				}
				if (any(single_product_insurers %in% available_insurers)) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Insurer %in% intersect(available_insurers,single_product_insurers))))
				}
				
				available_plans <- unique(available_plans)
				available_plan_names <- as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"])
				
				# These are the premiums before merging the small plans and HSA/catastrophic/bronze
				premium_plans <- plan_data[available_plans,"Premium"]
				names(premium_plans) <- available_plan_names
				
				#premium_plans_predicted <- plan_data[available_plans,"premium_predicted"]
				#names(premium_plans_predicted) <- available_plan_names
			
				
				# Here we need to do something about the small plans
						# Solution 1: take the minimum for small plans (what I do here)
						# Solution 2: average
						# NOTE: this ends up catching 2 Health Net plans (an HSP and PPO get merged) - no big deal
					
				# For bronze/hsa/cat - take minimum of the bronze plans	
					
					duplicate_plans <- unique(available_plan_names[duplicated(available_plan_names)])					
					for(s in duplicate_plans) {
						plan_indices <- which(available_plan_names == s)
						plans_to_merge <- available_plans[plan_indices]
						plans_to_merge <- setdiff(plans_to_merge,catastrophic_plans)
						premium_plans[plan_indices] <- min(plan_data[plans_to_merge,"Premium"])
						#premium_plans_predicted[plan_indices] <- min(plan_data[plans_to_merge,"premium_predicted"])
					}
				
				unique_available_plan_names <- which(!duplicated(available_plan_names))
				available_plan_names <- available_plan_names[unique_available_plan_names]
				premium_plans <- premium_plans[unique_available_plan_names]	
				#premium_plans_predicted <- premium_plans_predicted[unique_available_plan_names]	
				
				households_to_update <- which(households$zip_region_year == i & !is.na(households$zip_region_year))
				
				premiums[households_to_update,available_plan_names] <- households[households_to_update,"rating_factor"] %*% t(premium_plans)
				#premiums_predicted[households_to_update,available_plan_names] <- households[households_to_update,"rating_factor"] %*% t(premium_plans_predicted)
		}

		gc()
		
		# Insert Mandate Penalty as the Price of Being Uninsured
		# I am converting this penalty to a monthly basis 
		premiums[,"Uninsured"] <- households$penalty/12
		gc()
		
		# Apply Subsidy to all plans except catastrophic
		#catastrophic_plans <- unique(plan_data[plan_data$metal_level == "Minimum Coverage","Plan_Name_Small"])
		#noncatastrophic_plans <- which(!colnames(premiums) %in% catastrophic_plans)
		subsidies <- households$subsidy
		subsidies[is.na(subsidies)] <- 0
		
		unsubsidized_premiums <- premiums
		gc()
		premiums <- pmax(premiums - subsidies,0)
		gc()
		
		# Set premiums of plans not in choice set to NA
		premiums <- premiums * pmax(pmin(choices,1),1)	
		gc()
		subsidies <- unsubsidized_premiums - premiums
		gc()
		rm(unsubsidized_premiums)
		gc()
		subsidies[,ncol(subsidies)] <- 0
		gc()

		# Compute Residual
		#premiums_predicted <- pmax(premiums_predicted - subsidies,0)
		#residual <- premiums - premiums_predicted

		# Make Julia Object

			##### Initialize output object

				

				valid_choices <- as.vector(!is.na(t(choices)))	
				#julia_output <- expand.grid(colnames(choices),1:nrow(choices))[valid_choices,]
				julia_output <- expand.grid(colnames(choices),
					(num_households_prev_groups + 1):(num_households_prev_groups + nrow(households)))[valid_choices,]
				num_households_prev_groups <- num_households_prev_groups + nrow(households)
				gc()
				
				# Create household number index
				household_number_refs <- 1:nrow(households)
				names(household_number_refs) <- unique(julia_output[,2])
				julia_output$household_number <- household_number_refs[as.character(julia_output[,2])]
				
			##### Save year and weight from household object and then delete
				
				household_years <- rep(households[rownames(choices),"year"],each=ncol(choices))[valid_choices]	
				weight <- rep(households[rownames(choices),"weight"],each=ncol(choices))[valid_choices]
				penalties <- rep(households[rownames(choices),"penalty"]/12,each=ncol(choices))[valid_choices]
				gc()
					
			##### Reduce memory of premium and choice objects
				
				num_households <- nrow(choices)
				num_plans <- ncol(choices)
				
				# Reduce memory
				choices <- as.vector(t(choices))[valid_choices]
				previous_choices <- as.vector(t(previous_choices))[valid_choices]
				premiums <- as.vector(t(premiums))[valid_choices] - penalties
				avs <- as.vector(t(AV_matrix))[valid_choices]
				subsidies <- as.vector(t(subsidies))[valid_choices]
				#residual <- as.vector(t(residual))[valid_choices]
				gc()
				
			##### To send to Julia
				
				julia_output$choice <- choices
				gc()
				
				julia_output$previous_choice <- previous_choices
				julia_output[is.na(julia_output$previous_choice),"previous_choice"] <- 0
				
				julia_output$premium <- premiums
				#julia_output$residual <- residual
				julia_output$av <- avs
				julia_output$subsidy <- subsidies
				julia_output$penalty <- penalties
				
				julia_output$year <- household_years
				julia_output$weight <- weight	
				julia_output$uninsured_plan <- rep(c(rep(0,num_plans-1),1),num_households)[valid_choices]
				
				colnames(julia_output) <- c("plan_name","household_number_all","household_number","choice","previous_choice",
					"premium","av","subsidy","penalty","year","weight","uninsured_plan")
					
					
				julia_output$rating_area <- households[julia_output$household_number,"rating_area"]
				hrefs <- paste(julia_output[julia_output$plan_name != "Uninsured","plan_name"],
							   julia_output[julia_output$plan_name != "Uninsured","year"],
							   julia_output[julia_output$plan_name != "Uninsured","rating_area"],sep="")
				
				julia_output$residual <- 0
				julia_output[julia_output$plan_name != "Uninsured","residual"] <- plans[hrefs,"residual"]
		
				julia_output[julia_output$uninsured_plan == 1,c("premium","subsidy","residual")] <- 0
					
					
				filename <- paste("fit_julia_data.csv",sep="")
				write.csv(julia_output,file=filename,row.names=FALSE)	
	
	# Save Households
		
		fields <- sort(c("plan_name","plan_id","household_id","plan_number","FPL","household_size","enrollees",
			"perc_0to17","perc_18to25","perc_26to34","perc_35to44","perc_45to54","perc_asian","perc_black","perc_hispanic","perc_other", 
			"perc_male","rating_area","year","rating_factor","subsidy","subsidized_members",
			"penalty","weight","plan_number_nocsr","previous_plan_number","previous_plan_offered","SEP",
			"years_in_panel","nth_year"))
		
		# Save Data
		households <- households[,fields]
		gc()
		
		write.csv(households,file="fit_households.csv",row.names=FALSE)	
		

